rm(list = ls())
setwd("/Users/John/Dropbox/")

# --- Load Required Packages ---
library(lfe)
library(readr)
library(lmtest)
library(sandwich)
library(texreg)

# --- Load the dataset ---
df <- as.data.frame(read_csv("JOP_Replication_Materials/appendix/data/final_dataset_marketsize.csv"))
df <- df[,-1]

# --- OLS Models ---
ols1 <- felm(sqrtta ~ strategic*median_share + med_hhi_isic2 + med_soe_isic2 + nonept_share_lead4 | 0 | 0 | isic + year, data = df)
ols2 <- felm(sqrtta ~ strategic*median_share + med_hhi_isic2 + med_soe_isic2 + nonept_share_lead8 | 0 | 0 | isic + year, data = df)
ols3 <- felm(sqrtta ~ strategic*median_share + med_hhi_isic2 + med_soe_isic2 + total_cn_world_share4 | 0 | 0 | isic + year, data = df)
ols4 <- felm(sqrtta ~ strategic*median_share + med_hhi_isic2 + med_soe_isic2 + total_cn_world_share8 | 0 | 0 | isic + year, data = df)

# --- Poisson Models ---
pois1 <- glm(isic_year ~ strategic*median_share + med_hhi_isic2 + med_soe_isic2 + nonept_share_lead4, data = df, family = "poisson")
pois2 <- glm(isic_year ~ strategic*median_share + med_hhi_isic2 + med_soe_isic2 + nonept_share_lead8, data = df, family = "poisson")
pois3 <- glm(isic_year ~ strategic*median_share + med_hhi_isic2 + med_soe_isic2 + total_cn_world_share4, data = df, family = "poisson")
pois4 <- glm(isic_year ~ strategic*median_share + med_hhi_isic2 + med_soe_isic2 + total_cn_world_share8, data = df, family = "poisson")

# --- Clustered SEs for Poisson Models ---
pois1_se <- coeftest(pois1, vcov = vcovCL(pois1, cluster = ~isic + year))
pois2_se <- coeftest(pois2, vcov = vcovCL(pois2, cluster = ~isic + year))
pois3_se <- coeftest(pois3, vcov = vcovCL(pois3, cluster = ~isic + year))
pois4_se <- coeftest(pois4, vcov = vcovCL(pois4, cluster = ~isic + year))


# --- Coefficient name mapping ---
coef_map <- list(
  "strategic" = "Strategic",
  "strategic:median_share" = "Strategic x Median Process. Share",
  "nonept_share_lead4" = "China Share (excl. Process), 4-Yr Lead",
  "nonept_share_lead8" = "China Share (excl. Process), 8-Yr Lead",
  "total_cn_world_share4" = "Total China Share, 4-Yr Lead",
  "total_cn_world_share8" = "Total China Share, 8-Yr Lead",
  "median_share" = "Median Process. Share",
  "med_hhi_isic2" = "Geographic Concentration",
  "med_soe_isic2" = "SOE Share",
  "(Intercept)" = "Constant"
)

# --- Custom GOF rows ---
custom_gof <- list(
  "Num. Obs." = c(nobs(ols1), nobs(ols2), nobs(ols3), nobs(ols4), 
                  nobs(pois1), nobs(pois2), nobs(pois3), nobs(pois4)),
  "R$^2$" = c(
    round(summary(ols1)$r.squared, 3),
    round(summary(ols2)$r.squared, 3),
    round(summary(ols3)$r.squared, 3),
    round(summary(ols4)$r.squared, 3),
    "", "", "", ""
  ),
  "Adj. R$^2$" = c(
    round(summary(ols1)$adj.r.squared, 3),
    round(summary(ols2)$adj.r.squared, 3),
    round(summary(ols3)$adj.r.squared, 3),
    round(summary(ols4)$adj.r.squared, 3),
    "", "", "", ""
  ),
  "Log Likelihood" = c(
    round(logLik(ols1), 3),
    round(logLik(ols2), 3),
    round(logLik(ols3), 3),
    round(logLik(ols4), 3),
    round(logLik(pois1), 3),
    round(logLik(pois2), 3),
    round(logLik(pois3), 3),
    round(logLik(pois4), 3)
  )
)


# --- Extractor for glm with cluster-robust SEs ---
extract.robust <- function(model, robust_se) {
  coefs <- coef(model)
  se <- robust_se[, 2]
  pval <- robust_se[, 4]
  names <- rownames(robust_se)
  
  gof <- numeric(0)
  gof_names <- character(0)
  gof_decimal <- logical(0)
  
  # Try logLik
  ll <- suppressWarnings(tryCatch(as.numeric(logLik(model)), error = function(e) NA_real_))
  if (!is.na(ll)) {
    gof <- c(gof, ll)
    gof_names <- c(gof_names, "Log Likelihood")
    gof_decimal <- c(gof_decimal, TRUE)
  }
  
  # Try Num. Obs.
  n <- suppressWarnings(tryCatch(nobs(model), error = function(e) NA_real_))
  if (length(n) == 1 && !is.na(n)) {
    gof <- c(gof, n)
    gof_names <- c(gof_names, "Num. Obs.")
    gof_decimal <- c(gof_decimal, FALSE)
  }
  
  createTexreg(
    coef.names = names,
    coef = coefs[names],
    se = se,
    pvalues = pval,
    gof.names = gof_names,
    gof = gof,
    gof.decimal = gof_decimal
  )
}

# --- Strip GOF from a model ---
strip_all_gof <- function(model) {
  model@gof.names <- character(0)
  model@gof <- numeric(0)
  model@gof.decimal <- logical(0)
  return(model)
}

# --- Model list ---
models <- list(
  strip_all_gof(texreg::extract(ols1)),
  strip_all_gof(texreg::extract(ols2)),
  strip_all_gof(texreg::extract(ols3)),
  strip_all_gof(texreg::extract(ols4)),
  strip_all_gof(extract.robust(pois1, pois1_se)),
  strip_all_gof(extract.robust(pois2, pois2_se)),
  strip_all_gof(extract.robust(pois3, pois3_se)),
  strip_all_gof(extract.robust(pois4, pois4_se))
)

# --- Create LaTeX file from texreg ---
tex_output <- "JOP_Replication_Materials/appendix/output/E_table_5_plain.tex"

texreg(
  models,
  file = tex_output,
  custom.coef.map = coef_map,
  stars = c(0.1, 0.05, 0.01),
  digits = 3,
  custom.gof.rows = custom_gof,
  caption = "Regression Results on Tech Absorption",
  label = "tab:regression_results"
)

# --- Wrap LaTeX for full PDF ---
table_tex_raw <- readLines(tex_output)
start_line <- grep("\\\\begin\\{tabular\\}", table_tex_raw)
end_line <- grep("\\\\end\\{tabular\\}", table_tex_raw)
table_tex <- table_tex_raw[start_line:end_line]

latex_wrapper <- c(
  "\\documentclass[11pt]{article}",
  "\\usepackage[margin=1in]{geometry}",
  "\\usepackage{booktabs}",
  "\\usepackage{caption}",
  "\\usepackage{graphicx}",
  "\\usepackage{float}",
  "\\usepackage{adjustbox}",
  "\\usepackage{amsmath}",
  "\\usepackage{longtable}",
  "\\usepackage{placeins}",
  "\\usepackage{mathptmx}",  
  "\\renewcommand{\\arraystretch}{0.6}",
  "\\begin{document}",
  "\\begin{table}[H]",
  "\\centering",
  "\\caption{Results from Regression Including Measures of Future Market Size}",
  "\\vspace{0.5em}",
  "\\resizebox{\\textwidth}{!}{%",
  table_tex,
  "}",
  "\\end{table}",
  "\\FloatBarrier",
  "\\end{document}"
)

wrapped_file <- "JOP_Replication_Materials/appendix/output/E_table_5_wrapped.tex"
writeLines(latex_wrapper, wrapped_file)

# --- Save PDF ---
tools::texi2pdf(wrapped_file, clean = TRUE)
file.rename("E_table_5_wrapped.pdf", "JOP_Replication_Materials/appendix/output/E_table_5_wrapped.pdf")
browseURL("JOP_Replication_Materials/appendix/output/E_table_5_wrapped.pdf")
